This is hopefully the results section for the Study 2 NSE & SE babies watching ASL Stories. We have two main factors:
Ages of babies are from 5 to 14.99 months. That means we lose two “old” CODA babies - Janine (16 mo) and za02 (23 mo). The dataset also contains factors for younger v. older babies IF we want to do age group splits. I do not recommend it because we lose all significance that way and we have more precise age information, better than brute groups. IF we do use it, the baby age groups are such:
library(tidyverse)
library(janitor)
library(lme4)
library(lmerTest)
library(scales)
library(feather)
library(GGally)
# Grab data that was produced in 03importcleanbabies.Rmd
babies <- read_feather("cleanedbabyeyedata.feather") %>%
mutate(age = age*12) %>%
select(participant, language, age, gender, story, direction, mark, trial, repetition, aoi, secs, percent) %>%
rename(name = participant) %>%
mutate(agegroup = case_when(
age <= 8.99 ~ "younger",
age >= 9.0 & age < 15 ~ "older"
)) %>%
filter(!is.na(agegroup)) %>%
mutate(language = case_when(
language == "english" ~ "NSE",
language =="sign" ~ "SE"
)) %>%
rename(lang = language) %>%
mutate(lang2 = case_when(
name == "aubrey CODA 11m" ~ "HSE",
name == "GemmaF_11_4_13_CODA" ~ "HSE",
name == "Hannah_CODA_7m" ~ "HSE",
name == "ke11es12_7m_MomTerp" ~ "LSE",
name == "Miles_14m_SE" ~ "HSE",
name == "Parker 6 m SIGNING" ~ "HSE",
name == "Penn 6 months SIGN EXPOSED" ~ "HSE",
name == "Trinity 8m" ~ "LSE",
name == "Victoria07_18_17" ~ "LSE",
name == "Wells_8mos" ~ "LSE",
TRUE ~ lang
))
# filter(name != "Victoria07_18_17")
babiesinfo <- babies %>%
select(name, lang, age, gender) %>%
distinct() %>%
group_by(lang) %>%
summarise(N = n(),
age_mean = mean(age),
sd = sd(age),
min = min(age),
max = max(age))
genders <- babies %>%
select(name, lang, age, gender) %>%
distinct() %>%
group_by(lang, gender) %>%
summarise(N = n()) %>%
spread(gender, N)
babiesinfo <- left_join(babiesinfo, genders) %>%
select(lang, N, Female, Male, age_mean, sd, min, max) %>%
print()
babies$agegroup <- fct_relevel(babies$agegroup, c("younger","older"))
# t-test for NSE v. SE ages
summary(lm(data = distinct(select(babies, age, lang)), age ~ lang))
Call:
lm(formula = age ~ lang, data = distinct(select(babies, age,
lang)))
Residuals:
Min 1Q Median 3Q Max
-3.103 -2.206 -1.456 2.624 5.324
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.8764 0.8774 8.977 1.21e-07 ***
langSE 2.1865 1.4070 1.554 0.14
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.91 on 16 degrees of freedom
Multiple R-squared: 0.1311, Adjusted R-squared: 0.07684
F-statistic: 2.415 on 1 and 16 DF, p-value: 0.1397
# IF we do age groups, use this code
#
# babiesinfo <- babies %>%
# select(name, lang, age, agegroup, gender) %>%
# distinct() %>%
# group_by(lang, agegroup) %>%
# summarise(N = n(),
# age_mean = mean(age),
# sd = sd(age),
# min = min(age),
# max = max(age))
#
# genders <- babies %>%
# select(name, lang, age, agegroup, gender) %>%
# distinct() %>%
# group_by(lang, agegroup, gender) %>%
# summarise(N = n()) %>%
# spread(gender, N)
#
# babiesinfo <- left_join(babiesinfo, genders) %>%
# select(lang, agegroup, N, Female, Male, age_mean, sd, min, max) %>%
# print()We calculated percentages based on overall clip length as the denominator. In this way, we can meaningfully contrast looking times at the videos (which are variable lengths) based on different factors. But when we go to AOI analysis we need to re-calculate the percentages so the denominator is based on total looking time, not overall clip length.
The chart below shows little differences based on factors Age, Language, or Direction. That’s good, means the videos were equally engaging for all babies, right?
babies$lang <- as.factor(babies$lang)
babies_overall_looking <- babies %>%
group_by(name, age, lang, direction, story, repetition) %>%
summarise(percent = sum(percent)) # gets total looking percent for each trial for each baby
# Table of means
babies_overall_looking %>%
group_by(name, lang, direction) %>%
summarise(percent = mean(percent)) %>% # get average looking percent for each baby
group_by(lang, direction) %>%
summarise(mean_percent = mean(percent),
count = n(),
sd = sd(percent),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()
ggplot(babies_overall_looking, aes(x = age, y = percent, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
facet_grid(. ~ lang) +
geom_smooth(method = "lm", se = FALSE) +
ggtitle("Video Attention") +
xlab("age (months)") +
ylab("percent looking") +
theme_bw() +
scale_y_continuous(limits = c(0,1), labels = percent)
# Plot
# babies_overall_looking %>%
# group_by(lang, direction, name) %>%
# summarise(percent = mean(percent)) %>% # gets average looking percent for each baby
# group_by(lang, direction) %>%
# summarise(mean_percent = mean(percent), # gets group averages
# count = n(),
# sd = sd(percent),
# se = sd/sqrt(count)) %>%
# ggplot(aes(x = lang, y = mean_percent, fill = direction)) +
# geom_col(position = "dodge") +
# geom_errorbar(aes(ymin = mean_percent - se, ymax = mean_percent + se),
# position = position_dodge(width = 0.9), width = 0.25) +
# scale_y_continuous(limits = c(0,1), labels = percent) +
# theme_minimal() +
# theme(panel.grid.major.x = element_blank()) +
# # facet_wrap("lang") +
# ggtitle("Video Attention") +
# xlab("") +
# ylab("percent looking")
# babies_overall_looking %>%
# ggplot(aes(x = lang, y = percent, fill = direction)) +
# facet_wrap("agegroup") +
# geom_violin()A linear model shows there were no significant effects of any factors. We can conclude there was no effect on how much the babies looked at the stimuli.
Model failed to converge with max|grad| = 0.0100956 (tol = 0.002, component 1)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula:
percent ~ age + lang * direction + (direction | name) + (direction |
story)
Data: babies_overall_looking
REML criterion at convergence: -109.5
Scaled residuals:
Min 1Q Median 3Q Max
-2.17413 -0.71098 -0.01308 0.70596 2.47194
Random effects:
Groups Name Variance Std.Dev. Corr
name (Intercept) 0.0103068 0.10152
directionreversed 0.0009225 0.03037 1.00
story (Intercept) 0.0086390 0.09295
directionreversed 0.0039253 0.06265 -0.76
Residual 0.0330605 0.18183
Number of obs: 349, groups: name, 26; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.678349 0.086566 28.389111 7.836 1.41e-08 ***
age 0.002112 0.008899 23.169957 0.237 0.814
langSE -0.049561 0.050340 23.826263 -0.985 0.335
directionreversed -0.033420 0.034809 12.356798 -0.960 0.355
langSE:directionreversed 0.019252 0.041719 94.727001 0.461 0.646
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.853
langSE -0.067 -0.181
dirctnrvrsd -0.228 0.005 0.069
lngSE:drctn 0.034 0.000 -0.127 -0.497
convergence code: 0
Model failed to converge with max|grad| = 0.0100956 (tol = 0.002, component 1)
Computing profile confidence intervals ...
non-monotonic profile for .sig02non-monotonic profile for .sig05bad spline fit for .sig02: falling back to linear interpolationbad spline fit for .sig05: falling back to linear interpolation
2.5 % 97.5 %
.sig01 0.0620478402 0.14127848
.sig02 -1.0000000000 1.00000000
.sig03 0.0000000000 0.07371618
.sig04 0.0517443908 0.14795142
.sig05 -1.0000000000 0.62776099
.sig06 0.0001206534 0.11509190
.sigma 0.1681790045 0.19678094
(Intercept) 0.5182792219 0.84146936
age -0.0146536022 0.01861433
langSE -0.1442384160 0.04487979
directionreversed -0.0983969980 0.03156203
langSE:directionreversed -0.0624469660 0.10063677
Now we’ll re-calculate the percentages so the denominator is based on total looking time. All AOIs should sum up to 100% for each trial and each baby. Next let’s make a boxplot of all AOIs.
#Recalculate percent
babies <- babies %>%
ungroup() %>%
select(-percent) %>%
group_by(name, lang, agegroup, age, direction, story, mark, trial, repetition, gender) %>%
mutate(totalsec = sum(secs)) %>%
group_by(name, lang, agegroup, age, direction, story, mark, trial, repetition, gender, aoi) %>%
mutate(percent = secs/totalsec)
# Boxplot
babies %>%
ggplot(aes(x = aoi, y = percent, fill = direction)) +
geom_boxplot() +
ggtitle("AOI Attention") +
theme_bw() +
xlab("") +
theme(axis.text.x = element_text(angle=45, hjust = 1),
panel.grid.major.x = element_blank()) +
scale_y_continuous(labels = scales::percent, limits = c(0,1))It appears two important AOIs are MidChestTop and MidFaceBottom. Let’s look again only at midline AOIs:
midline = c("Belly","BelowChest","MidChestBottom","MidChestCenter","MidChestTop",
"MidFaceBottom","MidFaceCenter","MidFaceTop")
babies %>%
filter(aoi %in% midline) %>%
ggplot(aes(x = aoi, y = percent, fill = direction)) +
geom_boxplot() +
ggtitle("Midline AOI Attention") +
theme_bw() +
xlab("") +
theme(axis.text.x = element_text(angle=45, hjust = 1),
panel.grid.major.x = element_blank()) +
scale_y_continuous(labels = scales::percent, limits = c(0,1))I’m going to run linear models with only MidChestTop or MidFaceBottom, and see what happens. No age interactions.
MidChestTop:
MidFaceBottom:
babies %>%
filter(aoi %in% c("MidFaceBottom","MidChestTop")) %>%
ggplot(aes(x = age, y = percent, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
scale_y_continuous(limits = c(0,1), labels = percent) +
theme_bw() +
# theme(panel.grid.major.x = element_blank()) +
facet_grid(aoi ~ lang) +
ggtitle("AOI Attention") +
xlab("") +
ylab("percent looking")
midchesttop_lm <- lmer(percent ~ age + lang * direction + (1|name) + (1|story), data = filter(babies, aoi == "MidChestTop"))
summary(midchesttop_lm)Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: percent ~ age + lang * direction + (1 | name) + (1 | story)
Data: filter(babies, aoi == "MidChestTop")
REML criterion at convergence: -215.1
Scaled residuals:
Min 1Q Median 3Q Max
-3.0298 -0.7175 -0.1352 0.6298 2.6697
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 0.0115078 0.1073
story (Intercept) 0.0002281 0.0151
Residual 0.0253723 0.1593
Number of obs: 349, groups: name, 26; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.123866 0.076757 23.371972 1.614 0.12001
age 0.028038 0.008448 22.005650 3.319 0.00312 **
langSE -0.132994 0.050397 27.850283 -2.639 0.01346 *
directionreversed -0.044820 0.022405 320.419630 -2.000 0.04629 *
langSE:directionreversed 0.039943 0.034787 318.190116 1.148 0.25175
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.912
langSE -0.092 -0.172
dirctnrvrsd -0.139 -0.001 0.213
lngSE:drctn 0.085 0.006 -0.336 -0.644
Computing profile confidence intervals ...
2.5 % 97.5 %
.sig01 0.07183592 0.1408795019
.sig02 0.00000000 0.0455736684
.sigma 0.14721089 0.1721220249
(Intercept) -0.02299300 0.2706422164
age 0.01187038 0.0442054478
langSE -0.22928875 -0.0366528701
directionreversed -0.08864808 -0.0008204047
langSE:directionreversed -0.02820489 0.1081961718
#ggcoef(midchesttop_lm)
midfacebottom_lm <- lmer(percent ~ age + lang * direction + (1|name) + (1|story), data = filter(babies, aoi == "MidFaceBottom"))
summary(midfacebottom_lm)Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: percent ~ age + lang * direction + (1 | name) + (1 | story)
Data: filter(babies, aoi == "MidFaceBottom")
REML criterion at convergence: -159.7
Scaled residuals:
Min 1Q Median 3Q Max
-2.42568 -0.76861 -0.01802 0.69751 2.50871
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 0.019840 0.14085
story (Intercept) 0.002588 0.05088
Residual 0.028320 0.16829
Number of obs: 349, groups: name, 26; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.327355 0.099254 25.201834 3.298 0.0029 **
age -0.008293 0.010813 22.878745 -0.767 0.4510
langSE 0.193677 0.063358 26.931393 3.057 0.0050 **
directionreversed 0.038757 0.023800 317.155595 1.628 0.1044
langSE:directionreversed -0.052648 0.036870 315.966430 -1.428 0.1543
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.902
langSE -0.082 -0.176
dirctnrvrsd -0.113 -0.001 0.179
lngSE:drctn 0.069 0.005 -0.283 -0.647
Computing profile confidence intervals ...
2.5 % 97.5 %
.sig01 0.098862423 0.18348410
.sig02 0.024985536 0.09798262
.sigma 0.155497012 0.18175853
(Intercept) 0.136571835 0.51798698
age -0.029093105 0.01252478
langSE 0.071974034 0.31548548
directionreversed -0.007931901 0.08534794
langSE:directionreversed -0.124894091 0.01963867
#ggcoef(midfacebottom_lm)
# Bar chart
# babies %>%
# filter(aoi %in% c("MidFaceBottom","MidChestTop")) %>%
# group_by(agegroup, lang, direction, name, aoi) %>%
# summarise(percent = mean(percent)) %>% # gets average looking percent for each baby
# group_by(agegroup, lang, direction, aoi) %>%
# summarise(mean_percent = mean(percent), # gets group averages
# count = n(),
# sd = sd(percent),
# se = sd/sqrt(count)) %>%
# ggplot(aes(x = lang, y = mean_percent, fill = direction)) +
# geom_col(position = "dodge") +
# geom_errorbar(aes(ymin = mean_percent - se, ymax = mean_percent + se),
# position = position_dodge(width = 0.9), width = 0.25) +
# scale_y_continuous(limits = c(0,1), labels = percent) +
# theme_minimal() +
# theme(panel.grid.major.x = element_blank()) +
# facet_grid(aoi ~ agegroup) +
# ggtitle("Video Attention") +
# xlab("") +
# ylab("percent looking")Next, we’ll define a Face-Chest Ratio (FCR) such that:
We did not include Belly or MidFaceTop because of very low looking rates according to the boxplots above.
babies_fcr <- babies %>%
ungroup() %>%
spread(aoi, percent) %>%
group_by(name, age, agegroup, lang, gender, direction, story, repetition) %>%
summarise(face = sum(MidFaceCenter, MidFaceBottom, na.rm = TRUE),
chest = sum(MidChestTop, MidChestCenter, MidChestBottom, BelowChest, na.rm = TRUE),
fcr = (face - chest) / (face + chest))
# Table of means
babies_fcr %>%
group_by(lang, direction, name) %>%
summarise(fcr = mean(fcr)) %>% # gets average looking percent for each baby
group_by(lang, direction) %>%
summarise(mean_fcr = mean(fcr), # gets group averages
count = n(),
sd = sd(fcr),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()
# Plot
ggplot(babies_fcr, aes(x = age, y = fcr, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
scale_y_continuous(limits = c(-1,1)) +
theme_bw() +
theme(panel.grid.major.x = element_blank()) +
facet_grid(. ~ lang) +
ggtitle("Face-Chest Ratios") +
xlab("") +
ylab("FCR")
# Bar chart
# babies_fcr %>%
# group_by(agegroup, lang, direction, name) %>%
# summarise(fcr = mean(fcr)) %>% # gets average looking percent for each baby
# group_by(agegroup, lang, direction) %>%
# summarise(mean_fcr = mean(fcr), # gets group averages
# count = n(),
# sd = sd(fcr),
# se = sd/sqrt(count)) %>%
# ggplot(aes(x = lang, y = mean_fcr, fill = direction)) +
# geom_col(position = "dodge") +
# geom_errorbar(aes(ymin = mean_fcr - se, ymax = mean_fcr + se),
# position = position_dodge(width = 0.9), width = 0.25) +
# scale_y_continuous(limits = c(-1,1)) +
# theme_minimal() +
# theme(panel.grid.major.x = element_blank()) +
# facet_wrap("agegroup") +
# ggtitle("Face-Chest Ratios") +
# xlab("") +
# ylab("FCR")What will a linear mixed model tell us? (with no age interactions)
fcr_lm <- lmer(fcr ~ age + lang * direction + (1|name) + (1|story), data = babies_fcr)
summary(fcr_lm)Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: fcr ~ age + lang * direction + (1 | name) + (1 | story)
Data: babies_fcr
REML criterion at convergence: 435.2
Scaled residuals:
Min 1Q Median 3Q Max
-2.2099 -0.7696 0.1072 0.6216 2.3529
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 0.14617 0.3823
story (Intercept) 0.01376 0.1173
Residual 0.15701 0.3962
Number of obs: 349, groups: name, 26; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.03078 0.26449 24.55226 0.116 0.9083
age -0.03995 0.02900 22.79897 -1.378 0.1817
langSE 0.45277 0.16836 25.88319 2.689 0.0124 *
directionreversed 0.11633 0.05604 316.92752 2.076 0.0387 *
langSE:directionreversed -0.14108 0.08682 315.76750 -1.625 0.1051
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.907
langSE -0.077 -0.179
dirctnrvrsd -0.100 -0.001 0.159
lngSE:drctn 0.061 0.004 -0.250 -0.647
Computing profile confidence intervals ...
2.5 % 97.5 %
.sig01 0.271054322 0.49505092
.sig02 0.057128269 0.22865024
.sigma 0.366118773 0.42796786
(Intercept) -0.477091044 0.53858818
age -0.095676033 0.01579032
langSE 0.129600591 0.77612687
directionreversed 0.006295529 0.22595187
langSE:directionreversed -0.311081807 0.02922941
# https://jofrhwld.github.io/teaching/courses/2017_lsa/lectures/Session_8.nb.html
# library(boot)
# fcr_boot <- bootMer(fcr_lm, FUN = fixef, nsim = 1000)
# fcr_boot
# boot.ci(fcr_boot, index = 3, type = "perc")
#ggcoef(fcr_lm)
#emmeans::emmeans(fcr_lm, list(pairwise ~ direction*lang), adjust = "none")
# fcrse_lm <- lmer(fcr ~ age + direction + (1|name) + (1|story), data = filter(babies_fcr, lang == "SE"))
# fcrnse_lm <- lmer(fcr ~ age + direction + (1|name) + (1|story), data = filter(babies_fcr, lang == "NSE"))
# summary(fcrse_lm)
# summary(fcrnse_lm)I would like a large table with all individual percent looking means for each AOI and the individual FCR values, with ages, gender, video group for each child. (collapsed across stories and trials)
# babies has the AOI info, babies_fcr has FCR
# Collapse across stories and trials
babies_spread <- babies %>%
group_by(name, lang, age, gender, direction, aoi) %>%
summarise(percent = mean(percent, na.rm = T)) %>%
spread(aoi, percent)
babies_fcr_spread <- babies_fcr %>%
group_by(name, lang, age, gender, direction) %>%
summarise(fcr = mean(fcr, na.rm = T))
babies_large_table <- babies_spread %>%
left_join(babies_fcr_spread)
babies_large_table %>%
write_csv("large_table_babies.csv")And now heat maps!
heatmap_babies <- babies %>%
filter(aoi %in% midline) %>%
ungroup() %>%
group_by(lang, name, direction, aoi) %>%
summarise(percent = mean(percent, na.rm=TRUE)) %>%
group_by(lang, direction, aoi) %>%
summarise(percent = mean(percent, na.rm=TRUE)) %>%
ungroup() %>%
mutate(aoi = factor(aoi, levels = c("Belly","BelowChest","MidChestBottom","MidChestCenter","MidChestTop",
"MidFaceBottom","MidFaceCenter","MidFaceTop")))
ggplot(heatmap_babies, aes(x = lang, y = aoi)) +
geom_tile(aes(fill=percent),color="lightgray",na.rm=TRUE) +
# scale_fill_viridis(option = "viridis", direction=-1, limits = c(0,.7), labels = percent, name = "looking time") +
scale_fill_gradient(low = "#ffffff", high = "#08519c", space = "Lab", limits = c(0,.4501), labels = percent, name = "looking time", na.value = "grey50") +
theme_bw() +
theme(strip.text.x = element_text(size = 11, color = "black", face = "italic"),
strip.background = element_rect(colour = "white", fill = "white"),
panel.grid.major = element_line(color = "white")) +
facet_grid(. ~ direction) +
ylab("") + xlab("") + ggtitle("Eye Gaze Heat Map, by Direction") +
scale_y_discrete(expand=c(0,0)) +
scale_x_discrete(expand = c(0,0))
ggplot(heatmap_babies, aes(x = direction, y = aoi)) +
geom_tile(aes(fill=percent),color="lightgray",na.rm=TRUE) +
# scale_fill_viridis(option = "viridis", direction=-1, limits = c(0,.7), labels = percent, name = "looking time") +
scale_fill_gradient(low = "#ffffff", high = "#08519c", space = "Lab", limits = c(0,.4501), labels = percent, name = "looking time", na.value = "grey50") +
theme_bw() +
theme(strip.text.x = element_text(size = 11, color = "black", face = "italic"),
strip.background = element_rect(colour = "white", fill = "white"),
panel.grid.major = element_line(color = "white")) +
facet_grid(. ~ lang) +
ylab("") + xlab("") + ggtitle("Eye Gaze Heat Map, by Language") +
scale_y_discrete(expand=c(0,0)) +
scale_x_discrete(expand = c(0,0))heatmap_babies2 <- babies %>%
filter(aoi %in% midline) %>%
ungroup() %>%
group_by(lang, name, aoi) %>%
summarise(percent = mean(percent, na.rm=TRUE)) %>%
group_by(lang, aoi) %>%
summarise(percent = mean(percent, na.rm=TRUE)) %>%
ungroup() %>%
mutate(aoi = factor(aoi, levels = c("Belly","BelowChest","MidChestBottom","MidChestCenter","MidChestTop",
"MidFaceBottom","MidFaceCenter","MidFaceTop")))
ggplot(heatmap_babies2, aes(x = lang, y = aoi)) +
geom_tile(aes(fill=percent),color="lightgray",na.rm=TRUE) +
# scale_fill_viridis(option = "viridis", direction=-1, limits = c(0,.7), labels = percent, name = "looking time") +
scale_fill_gradient(low = "#ffffff", high = "#08519c", space = "Lab", limits = c(0,.4501), labels = percent, name = "looking time", na.value = "grey50") +
theme_bw() +
theme(strip.text.x = element_text(size = 11, color = "black", face = "italic"),
strip.background = element_rect(colour = "white", fill = "white"),
panel.grid.major = element_line(color = "white")) +
ylab("") + xlab("") + ggtitle("Eye Gaze Heat Map, by Language (Collapsed by Direction)") +
scale_y_discrete(expand=c(0,0)) +
scale_x_discrete(expand = c(0,0))ggplot(heatmap_babies, aes(x = direction, y = aoi)) +
geom_tile(aes(fill = percent),
color="dark gray",
size = 0.25,
na.rm = T,
height = rep(c(10,4,1,1,1,1,1,1),4)) +
scale_fill_gradient(low = "#ffffff",
high = "#08519c",
space = "Lab",
limits = c(0,.4501),
labels = percent,
name = "looking time",
na.value = "grey50") +
facet_grid(. ~ lang) +
ylab("") + xlab("") + ggtitle("Eye Gaze Heat Map, by Language") +
scale_y_discrete(expand=c(0,0)) +
scale_x_discrete(expand=c(0,0)) +
theme_bw() +
theme(text = element_text(size = 20, family = "xkcd"),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
strip.text = element_blank(),
panel.border = element_rect(size = 2),
title = element_blank()) +
guides(color = FALSE, fill = FALSE)I want to try to visualize reversal effects a different way. Maybe this.
# Get participant-level data
babies_fcr2 <- babies_fcr %>%
group_by(name, age, lang, direction) %>%
summarise(fcr = mean(fcr))
# reversal_effect_lm <- lmer(fcr ~ age + lang * direction + (1|name), data = babies_fcr2)
# summary(reversal_effect_lm)
ggplot(babies_fcr2, aes(x = direction, y = fcr, color = lang, fill = lang)) +
geom_point() +
geom_line(aes(group = name)) +
facet_grid(. ~ lang) +
scale_y_continuous(limits = c(-1,1)) +
theme_bw()Or a reversal effect chart? Okay, so this chart tells us overall there really wasn’t much of a reversal effect for SE babies, they’re all hovering around 0. Interesting. While there seems to be a reversal effect for NSE babies where they look at the face more during reversed stories!
# Get participant-level data
babies_fcr3 <- babies_fcr2 %>%
spread(direction, fcr) %>%
group_by(name, age, lang) %>%
mutate(diff = (forward - reversed))
ggplot(babies_fcr3, aes(x = age, y = diff, color = lang)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
scale_y_continuous(limits = c(-1,1)) +
theme_bw() +
ggtitle("Reversal Effect") +
ylab("Forward FCR - Reversed FCR")This analysis of within-subject variation here:
# First get the mean of each trial, THEN the participant-level means
within_subjects <- babies_fcr %>%
group_by(name, lang, direction, story, repetition) %>%
summarise(fcr = mean(fcr, na.rm = TRUE),
count = n()) %>%
group_by(name, lang, direction) %>%
summarise(mean = mean(fcr, na.rm = TRUE),
se = sd(fcr, na.rm = TRUE)/sqrt(n()),
count = n())
# Then spread out mean and SE columns by direction
within_subjects_means <- within_subjects %>%
select(-se, -count) %>%
spread(direction, mean, sep = "_")
within_subjects_se <- within_subjects %>%
select(-mean, -count) %>%
spread(direction, se, sep = "SE")
within_subjects <- left_join(within_subjects_means, within_subjects_se, by = c("name","lang"))
# Now let's plot
lims <- c(-1,1)
within_subjects %>%
ggplot(aes(x = direction_forward, y = direction_reversed, color = lang)) +
geom_abline() +
geom_point(size = 2) +
geom_errorbar(aes(ymin=direction_reversed-directionSEreversed, ymax=direction_reversed+directionSEreversed)) +
geom_errorbarh(aes(xmin=direction_forward-directionSEforward, xmax=direction_forward+directionSEforward)) +
theme_bw() +
theme(aspect.ratio = 1) +
scale_x_continuous("forward", limits = c(-1,1)) +
scale_y_continuous("reversed", limits = c(-1,1)) +
ggtitle("FCR Means") +
facet_wrap("lang")And a classic box/error plot with age collapsed.
babies_fcr2 %>%
group_by(lang, direction) %>%
summarise(fcr_mean = mean(fcr),
sd = sd(fcr),
n = n(),
se = sd/sqrt(n)) %>%
ggplot(aes(x = lang, y = fcr_mean, fill = direction)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_errorbar(aes(ymin = fcr_mean-se, ymax = fcr_mean+se), position = position_dodge(0.9), width = 0.2) +
scale_y_continuous(limits = c(-0.5, 0.5)) +
theme_linedraw()Registering fonts with R
Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Registered S3 method overwritten by 'data.table':
method from
print.data.table
babies_fcr2 %>%
group_by(lang, direction) %>%
summarise(fcr_mean = mean(fcr),
sd = sd(fcr),
n = n(),
se = sd/sqrt(n)) %>%
ggplot(aes(x = lang, y = fcr_mean, color = direction, fill = direction, group = direction)) +
geom_hline(yintercept = 0, size = 0.5) +
geom_point(size = 6, position = position_dodge(width = 0.4)) +
geom_errorbar(aes(ymin = fcr_mean-se, ymax = fcr_mean+se),
size = 2,
position = position_dodge(0.4),
width = 0.3) +
scale_y_continuous(limits = c(-0.5, 0.5)) +
theme_linedraw() +
theme(text = element_text(size = 30),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
panel.border = element_rect(size = 2),
axis.ticks.y = element_line(size = 0.5),
panel.grid.major.y = element_line(size = 0.5, color = "light gray", linetype = "dashed")) +
guides(color = FALSE, fill = FALSE)The biggest change is we lost the interaction between language and direction. For the ICSLA abstract we reported a strong interaction (p = 0.01), but now it’s p = 0.10. Shoot.
The interpretation here is that:
We’ll load the data from the childxydata.feather file made in 06rawxydata.Rmd. So any new babies, please run the first code block in 06 to include it. Then we’ll keep all the babies we also have in the AOI data group.
included <- babies %>%
ungroup() %>%
select(name) %>%
distinct() %>%
unlist()
xydata <- read_feather("../Child Data/childxydata.feather") %>%
rename(name = participant) %>%
filter(name %in% included)
# Get ages
ages <- read_csv("childrenages.csv") %>%
rename(name = participant)
xydata <- xydata %>% left_join(ages, by = "name") %>%
mutate(age = age*12) %>%
mutate(agegroup = case_when(
age <= 8.99 ~ "younger",
age >= 9.0 & age < 15 ~ "older"
)) %>%
mutate(language = case_when(
language == "EnglishExposed" ~ "NSE",
language == "SignLanguageExposed" ~ "SE"
)) %>%
rename(lang = language) %>%
select(name, group, gender, lang, condition, mark, trial, repetition, x, y, age, agegroup) %>%
separate(condition, into = c("story", "clip", "direction")) %>%
unite("story", c("story", "clip")) %>%
mutate(direction = case_when(
direction == "ER" ~ "reversed",
direction == "FW" ~ "forward"
)) %>%
mutate(name = factor(name),
group = factor(group),
gender = factor(gender),
lang = factor(lang),
story = factor(story),
direction = factor(direction),
mark = factor(mark),
trial = factor(trial),
repetition = factor(repetition),
agegroup = factor(agegroup))Let’s check that we have no significant group or condition differences in terms of valid (not empty) data points collected. This is same as “Global Looking” we have above, really, but w raw xy data.
xy_overall <- xydata %>%
filter(!is.na(x)) %>%
group_by(name, age, lang, direction, story, repetition) %>%
summarise(data_points = n()) # gets total looking percent for each trial for each baby
# Table of means
xy_overall %>%
group_by(name, lang, direction) %>%
summarise(data_points = mean(data_points)) %>% # get average looking percent for each baby
group_by(lang, direction) %>%
summarise(mean_data_points = mean(data_points),
count = n(),
sd = sd(data_points),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()
ggplot(xy_overall, aes(x = age, y = data_points, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
facet_grid(. ~ lang) +
geom_smooth(method = "lm", se = FALSE) +
ggtitle("Data Points") +
xlab("age (months)") +
ylab("data points recorded") +
theme_bw() A linear model shows there were no significant effects of any factors. We can conclude there was no effect on how much data was collected from the babies.
overall_xy_lm <- lmer(data_points ~ age + lang * direction + (direction|name) + (direction|story), data = xy_overall)
summary(overall_xy_lm) Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula:
data_points ~ age + lang * direction + (direction | name) + (direction |
story)
Data: xy_overall
REML criterion at convergence: 5528.4
Scaled residuals:
Min 1Q Median 3Q Max
-3.7591 -0.6647 -0.0082 0.6288 2.5534
Random effects:
Groups Name Variance Std.Dev. Corr
name (Intercept) 20854.2 144.41
directionreversed 800.9 28.30 1.00
story (Intercept) 22484.8 149.95
directionreversed 4209.1 64.88 -0.04
Residual 52394.3 228.90
Number of obs: 401, groups: name, 26; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 530.672 119.943 29.510 4.424 0.000121 ***
age 7.772 12.021 23.311 0.646 0.524272
langSE 9.965 68.206 23.876 0.146 0.885062
directionreversed -20.420 38.038 11.694 -0.537 0.601450
langSE:directionreversed -16.655 48.551 134.420 -0.343 0.732113
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.828
langSE -0.057 -0.186
dirctnrvrsd -0.045 -0.003 0.065
lngSE:drctn 0.027 0.002 -0.133 -0.499
Computing profile confidence intervals ...
non-monotonic profile for .sig02non-monotonic profile for .sig05bad spline fit for .sig02: falling back to linear interpolationbad spline fit for .sig05: falling back to linear interpolation
2.5 % 97.5 %
.sig01 94.37381 198.05145
.sig02 -1.00000 1.00000
.sig03 0.00000 79.33475
.sig04 83.06418 264.23195
.sig05 -1.00000 1.00000
.sig06 0.00000 141.67953
.sigma 212.91391 246.24259
(Intercept) 296.10665 769.95935
age -16.80438 31.72486
langSE -122.37062 144.10701
directionreversed -96.43798 55.97517
langSE:directionreversed -112.38520 78.76240
Now we’re going to run LMMs on babies’ raw:
But to do this we first trim each baby’s data, getting rid of the first 60 samples (0.5 secs) of each trial.
xydata <- xydata %>%
group_by(name,trial) %>%
slice(60:n())
iqr <- xydata %>%
group_by(name, age, lang, story, direction, trial) %>%
summarise(xIQR = IQR(x,na.rm=TRUE),
yIQR = IQR(y,na.rm=TRUE),
xmed = median(x, na.rm=TRUE),
ymed = median(y, na.rm=TRUE),
area = xIQR*yIQR)
head(iqr,20)Results tell us that there is a significant effect of age where horizontal spread grew narrower with older babies (p = 0.024; slope = -2.2 px/month). There was a marginal effect of language where SE babies have slightly narrower horizontal spread (11 px; p = 0.088). No effects of reversal or interactions.
xiqr_mean <- iqr %>%
group_by(lang, direction, name) %>%
summarise(xIQR = mean(xIQR, na.rm = T)) %>% # gets average looking percent for each baby
group_by(lang, direction) %>%
summarise(mean_xIQR = mean(xIQR), # gets group averages
count = n(),
sd = sd(xIQR),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()
# Plot
ggplot(iqr, aes(x = age, y = xIQR, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
theme_bw() +
theme(panel.grid.major.x = element_blank()) +
facet_grid(. ~ lang) +
ggtitle("Horizontal Spread") +
xlab("") +
ylab("xIQR")
ggplot(xiqr_mean, aes(x = lang, y = mean_xIQR, fill = direction)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_errorbar(aes(ymin = mean_xIQR-se, ymax = mean_xIQR+se), position = position_dodge(0.9), width = 0.2) +
theme_linedraw()Model failed to converge with max|grad| = 0.00320909 (tol = 0.002, component 1)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: xIQR ~ age + lang * direction + (1 | name) + (1 | story)
Data: iqr
REML criterion at convergence: 3839.5
Scaled residuals:
Min 1Q Median 3Q Max
-2.1303 -0.4823 -0.1684 0.2619 9.8734
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 105.6711 10.2796
story (Intercept) 0.4599 0.6781
Residual 901.8340 30.0306
Number of obs: 398, groups: name, 26; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 63.1895 8.5763 25.5875 7.368 8.82e-08 ***
age -2.1897 0.9339 23.1557 -2.345 0.0280 *
langSE -12.3760 6.0882 41.4445 -2.033 0.0485 *
directionreversed 1.8858 3.8626 372.0751 0.488 0.6257
langSE:directionreversed 0.1957 6.1817 369.3450 0.032 0.9748
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.901
langSE -0.121 -0.160
dirctnrvrsd -0.214 -0.005 0.308
lngSE:drctn 0.136 0.000 -0.503 -0.625
convergence code: 0
Model failed to converge with max|grad| = 0.00320909 (tol = 0.002, component 1)
Computing profile confidence intervals ...
Last two rows have identical or NA .zeta values: using minstepLast two rows have identical or NA .zeta values: using minstepLast two rows have identical or NA .zeta values: using minstepLast two rows have identical or NA .zeta values: using minstepLast two rows have identical or NA .zeta values: using minstepslightly lower deviances (diff=-4.54747e-13) detectedLast two rows have identical or NA .zeta values: using minstepslightly lower deviances (diff=-4.54747e-13) detectedLast two rows have identical or NA .zeta values: using minstepslightly lower deviances (diff=-4.54747e-13) detectednon-monotonic profile for .sig02bad spline fit for .sig02: falling back to linear interpolationcollapsing to unique 'x' values
2.5 % 97.5 %
.sig01 5.517731 14.3257965
.sig02 0.000000 6.2873171
.sigma 27.926919 32.2281330
(Intercept) 47.094920 79.2757838
age -3.940171 -0.4378617
langSE -23.852346 -0.8993574
directionreversed -5.692211 9.4417865
langSE:directionreversed -11.912654 12.3126452
We had a significant effect of age where vertical spread got narrower with older babies (p = 0.019, slope = -5.5 px/month). There were no effect of language, direction, or interactions.
yiqr_mean <- iqr %>%
group_by(lang, direction, name) %>%
summarise(yIQR = mean(yIQR, na.rm = T)) %>% # gets average looking percent for each baby
group_by(lang, direction) %>%
summarise(mean_yIQR = mean(yIQR), # gets group averages
count = n(),
sd = sd(yIQR),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()
# Plot
ggplot(iqr, aes(x = age, y = yIQR, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
theme_bw() +
theme(panel.grid.major.x = element_blank()) +
facet_grid(. ~ lang) +
ggtitle("Vertical Spread") +
xlab("") +
ylab("yIQR")
ggplot(yiqr_mean, aes(x = lang, y = mean_yIQR, fill = direction)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_errorbar(aes(ymin = mean_yIQR-se, ymax = mean_yIQR+se), position = position_dodge(0.9), width = 0.2) +
theme_linedraw()Model failed to converge with max|grad| = 0.00609049 (tol = 0.002, component 1)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: yIQR ~ age + lang * direction + (1 | name) + (1 | story)
Data: iqr
REML criterion at convergence: 4251
Scaled residuals:
Min 1Q Median 3Q Max
-2.7596 -0.5266 -0.2214 0.3696 5.9050
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 6.792e+02 26.06181
story (Intercept) 2.499e-04 0.01581
Residual 2.479e+03 49.79276
Number of obs: 398, groups: name, 26; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 120.510 19.174 24.279 6.285 1.61e-06 ***
age -5.600 2.113 22.966 -2.650 0.0143 *
langSE -17.254 12.936 32.133 -1.334 0.1917
directionreversed 8.759 6.406 371.126 1.367 0.1724
langSE:directionreversed -5.694 10.251 370.658 -0.555 0.5789
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.911
langSE -0.096 -0.171
dirctnrvrsd -0.158 -0.004 0.240
lngSE:drctn 0.101 0.000 -0.393 -0.625
convergence code: 0
Model failed to converge with max|grad| = 0.00609049 (tol = 0.002, component 1)
Computing profile confidence intervals ...
Last two rows have identical or NA .zeta values: using minstepnon-monotonic profile for .sig02bad spline fit for .sig02: falling back to linear interpolationcollapsing to unique 'x' values
2.5 % 97.5 %
.sig01 17.067513 34.481568
.sig02 0.000000 10.380480
.sigma 46.299351 53.445276
(Intercept) 85.144804 155.891991
age -9.491015 -1.709599
langSE -41.342020 6.860432
directionreversed -3.780771 21.323088
langSE:directionreversed -25.806149 14.366311
Again, age is significant (p = 0.01) such that area gets smaller with age (about 500 sq. px. per month). No effect of language or direction or interactions.
area_mean <- iqr %>%
group_by(lang, direction, name) %>%
summarise(area = mean(area, na.rm = T)) %>% # gets average looking percent for each baby
group_by(lang, direction) %>%
summarise(area_mean = mean(area), # gets group averages
count = n(),
sd = sd(area),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()
# Plot
ggplot(iqr, aes(x = age, y = area, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
theme_bw() +
theme(panel.grid.major.x = element_blank()) +
facet_grid(. ~ lang) +
ggtitle("Viewing Area") +
xlab("") +
ylab("Area (px^2)")
ggplot(area_mean, aes(x = lang, y = area_mean, fill = direction)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_errorbar(aes(ymin = area_mean-se, ymax = area_mean+se), position = position_dodge(0.9), width = 0.2) +
theme_linedraw()boundary (singular) fit: see ?isSingular
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: area ~ age + lang * direction + (1 | name) + (1 | story)
Data: iqr
REML criterion at convergence: 8154.7
Scaled residuals:
Min 1Q Median 3Q Max
-1.6682 -0.2747 -0.1284 0.0305 13.4651
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 6.314e+06 2512.8269
story (Intercept) 1.156e-02 0.1075
Residual 5.292e+07 7274.7944
Number of obs: 398, groups: name, 26; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 9286.13 2088.34 25.69 4.447 0.000148 ***
age -577.66 227.57 23.22 -2.538 0.018291 *
langSE -2119.67 1481.26 41.31 -1.431 0.159954
directionreversed 708.63 935.48 372.11 0.757 0.449231
langSE:directionreversed -299.21 1497.33 371.33 -0.200 0.841722
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.901
langSE -0.120 -0.160
dirctnrvrsd -0.213 -0.005 0.306
lngSE:drctn 0.135 0.000 -0.501 -0.625
convergence code: 0
boundary (singular) fit: see ?isSingular
Computing profile confidence intervals ...
2.5 % 97.5 %
.sig01 1345.191 3499.1946
.sig02 0.000 1266.8763
.sigma 6763.421 7808.4617
(Intercept) 5290.599 13280.6501
age -1013.228 -141.9810
langSE -4950.962 711.7087
directionreversed -1123.117 2542.6941
langSE:directionreversed -3236.019 2631.9398
medians <- iqr %>%
group_by(name,lang,direction) %>%
summarise(xIQR = mean(xIQR,na.rm=TRUE),
yIQR = mean(yIQR,na.rm=TRUE),
xmed = mean(xmed,na.rm=TRUE),
ymed = mean(ymed,na.rm=TRUE)) %>%
group_by(lang,direction) %>%
summarise(xIQR = mean(xIQR,na.rm=TRUE),
yIQR = mean(yIQR,na.rm=TRUE),
x = mean(xmed,na.rm=TRUE),
y = mean(ymed,na.rm=TRUE)) %>%
mutate(y = y*-1,
xmin = x-(xIQR/2),
xmax = x+(xIQR/2),
ymin = y-(yIQR/2),
ymax = y+(yIQR/2))
img <- png::readPNG("cindy.png")
g <- grid::rasterGrob(img, interpolate=TRUE, width=unit(1,"npc"), height=unit(1,"npc"))
ggplot(medians, aes(fill=direction,color=direction)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
theme_linedraw() +
scale_x_continuous(limits = c(0,1080), expand = c(0, 0)) +
scale_y_continuous(limits = c(-720,0), expand = c(0, 0)) +
facet_wrap("lang")Get puppy data!
# puppies1 <- read_tsv("../Child Data/_puppies/all puppies group 1 sums.txt", na = "-") %>%
# clean_names() %>%
# select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
# rename(name = x1) %>%
# mutate(mean = rowMeans(.[2:10], na.rm = T)) %>%
# select(name, mean)
#
# puppies2 <- read_tsv("../Child Data/_puppies/all puppies group 2 sums.txt", na = "-") %>%
# clean_names() %>%
# select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
# rename(name = x1) %>%
# mutate(mean = rowMeans(.[2:17], na.rm = T)) %>%
# select(name, mean)
#
# puppies <- rbind(puppies1, puppies2)
#
# participants <- xydata %>%
# select(name, group, gender, lang) %>%
# distinct()
#
# puppies <- left_join(participants, puppies, by = "name")
#
# summary(lm(data = puppies, mean ~ lang))
#
# puppies_se <- filter(puppies, lang == "NSE")
# puppies_nse <- filter(puppies, lang == "SE")
#
# t.test(puppies_se$mean, puppies_nse$mean)
# Let's do this again but preserve puppy-level data
puppies1 <- read_tsv("../Child Data/_puppies/all puppies group 1 sums.txt", na = "-") %>%
clean_names() %>%
select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
rename(name = x1) %>%
gather(key = puppy, value = sec, -name) %>%
mutate(pups = case_when(
str_detect(puppy, "huskies") ~ "huskies",
str_detect(puppy, "golden") ~ "golden",
str_detect(puppy, "wawa") ~ "wawa",
str_detect(puppy, "frisby") ~ "frisby",
str_detect(puppy, "bulldog") ~ "bulldog",
str_detect(puppy, "puppy_jpg") ~ "puppy",
TRUE ~ puppy
))
puppies2 <- read_tsv("../Child Data/_puppies/all puppies group 2 sums.txt", na = "-") %>%
clean_names() %>%
select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
rename(name = x1) %>%
gather(key = puppy, value = sec, -name) %>%
mutate(pups = case_when(
str_detect(puppy, "huskies") ~ "huskies",
str_detect(puppy, "golden") ~ "golden",
str_detect(puppy, "wawa") ~ "wawa",
str_detect(puppy, "frisby") ~ "frisby",
str_detect(puppy, "bulldog") ~ "bulldog",
str_detect(puppy, "puppies") ~ "puppies",
TRUE ~ puppy
))
puppies <- rbind(puppies1,puppies2)
participants <- xydata %>%
select(name, group, gender, lang) %>%
distinct()
puppies <- left_join(participants, puppies, by = "name")
summary(lmer(data = puppies, sec ~ lang + (1|name) + (1|pups)))Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: sec ~ lang + (1 | name) + (1 | pups)
Data: puppies
REML criterion at convergence: 15818.6
Scaled residuals:
Min 1Q Median 3Q Max
-2.9738 -0.5348 -0.0747 0.3391 8.2862
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 0.8589 0.9268
pups (Intercept) 0.1115 0.3339
Residual 1.8390 1.3561
Number of obs: 4550, groups: name, 26; pups, 7
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 2.1716 0.2656 29.2675 8.178 4.78e-09 ***
langSE -0.5248 0.3760 23.9275 -1.396 0.176
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr)
langSE -0.545
2.5 % 97.5 %
.sig01 0.6940036 1.2121151
.sig02 0.1984206 0.6315016
.sigma 1.3285985 1.3845355
(Intercept) 1.6522810 2.6917220
langSE -1.2644534 0.2148537
First let’s prep the data.
multiples <- xydata %>%
filter(!is.na(x)) %>%
filter(!is.na(y)) %>%
group_by(name, age, lang, story, direction, trial) %>%
summarise(xIQR = IQR(x,na.rm=TRUE),
yIQR = IQR(y,na.rm=TRUE),
xmed = median(x, na.rm=TRUE),
ymed = median(y, na.rm=TRUE),
area = xIQR*yIQR,
x_90 = quantile(x, .95, na.rm=TRUE) - quantile(x, .05, na.rm=TRUE),
y_90 = quantile(y, .95, na.rm=TRUE) - quantile(y, .05, na.rm=TRUE),
area_90 = (x_90) * (y_90),
x_mean = mean(x, na.rm = TRUE),
y_mean = mean(y, na.rm = TRUE),
x_sd = sd(x, na.rm = TRUE),
y_sd = sd(y, na.rm = TRUE),
x_1sd = (x_mean+x_sd) - (x_mean-x_sd),
y_1sd = (y_mean+y_sd) - (y_mean-y_sd),
area_1sd = x_1sd * y_1sd,
x_2sd = (x_mean+(x_sd*2)) - (x_mean-(x_sd*2)),
y_2sd = (y_mean+(y_sd*2)) - (y_mean-(y_sd*2)),
area_2sd = x_2sd * y_2sd) %>%
group_by(name, lang, direction) %>%
summarise_if(is.double, funs(mean), na.rm = T) %>%
group_by(lang, direction) %>%
summarise_if(is.double, funs(mean), na.rm = T)funs() is soft deprecated as of dplyr 0.8.0
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
[90mThis warning is displayed once per session.[39m
img <- png::readPNG("cindy.png")
g <- grid::rasterGrob(img, interpolate=TRUE, width=unit(1,"npc"), height=unit(1,"npc")) Let’s see.
curr_data <- multiples %>%
ungroup() %>%
select(lang, direction, xmed, ymed, xIQR, yIQR) %>%
group_by(lang, direction) %>%
summarise(xmin = xmed-(xIQR/2),
xmax = xmed+(xIQR/2),
ymin = -1*(ymed-(yIQR/2)),
ymax = -1*(ymed+(yIQR/2)))
ggplot(curr_data, aes(fill=direction,color=direction)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
theme_linedraw() +
scale_x_continuous(limits = c(0,1080), expand = c(0, 0)) +
scale_y_continuous(limits = c(-720,0), expand = c(0, 0)) +
facet_wrap("lang")So I calculated the average median across, and the middle 90% of the data.
curr_data <- multiples %>%
ungroup() %>%
select(lang, direction, xmed, ymed, x_90, y_90) %>%
group_by(lang, direction) %>%
summarise(xmin = xmed-(x_90/2),
xmax = xmed+(x_90/2),
ymin = -1*(ymed-(y_90/2)),
ymax = -1*(ymed+(y_90/2)))
ggplot(curr_data, aes(fill=direction,color=direction)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
theme_linedraw() +
scale_x_continuous(limits = c(0,1080), expand = c(0, 0)) +
scale_y_continuous(limits = c(-720,0), expand = c(0, 0)) +
facet_wrap("lang")
# ggplot(filter(curr_data, lang == "NSE"), aes(fill=direction,color=direction)) +
# annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
# geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.2, size = 1) +
# theme_linedraw() +
# scale_x_continuous(limits = c(0,1080), expand = c(0, 0)) +
# scale_y_continuous(limits = c(-720,0), expand = c(0, 0))
#
#
# ggplot(filter(curr_data, lang == "SE"), aes(fill=direction,color=direction)) +
# annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
# geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.2, size = 1) +
# theme_linedraw() +
# scale_x_continuous(limits = c(0,1080), expand = c(0, 0)) +
# scale_y_continuous(limits = c(-720,0), expand = c(0, 0))So this is using the mean of the means, plus or minus one SD. This is equivalent to middle 68%.
curr_data <- multiples %>%
ungroup() %>%
select(lang, direction, x_mean, y_mean, x_1sd, y_1sd) %>%
group_by(lang, direction) %>%
summarise(xmin = x_mean-(x_1sd/2),
xmax = x_mean+(x_1sd/2),
ymin = -1*(y_mean-(y_1sd/2)),
ymax = -1*(y_mean+(y_1sd/2)))
ggplot(curr_data, aes(fill=direction,color=direction)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
theme_linedraw() +
scale_x_continuous(limits = c(0,1080), expand = c(0, 0)) +
scale_y_continuous(limits = c(-720,0), expand = c(0, 0)) +
facet_wrap("lang")And this is using the mean of the means, plus or minus two SD. This is equivalent to middle 96%.
curr_data <- multiples %>%
ungroup() %>%
select(lang, direction, x_mean, y_mean, x_2sd, y_2sd) %>%
group_by(lang, direction) %>%
summarise(xmin = x_mean-(x_2sd/2),
xmax = x_mean+(x_2sd/2),
ymin = -1*(y_mean-(y_2sd/2)),
ymax = -1*(y_mean+(y_2sd/2)))
ggplot(curr_data, aes(fill=direction,color=direction)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
theme_linedraw() +
scale_x_continuous(limits = c(0,1080), expand = c(0, 0)) +
scale_y_continuous(limits = c(-720,0), expand = c(0, 0)) +
facet_wrap("lang")